*local path "/Users/christopherrauh/Dropbox/text analysis sandbox/programs/hard_problem/output/"
local path "D:\Dropbox\text analysis sandbox\programs\hard_problem\output"
*args start_year T  mintokens maxtokens cutter where year month weirdo path


*display "staryear"

local start_year 0
local T 15
local  mintokens 1
local maxtokens 3
local cutter 200
local where "\superall"

local year 2019
local month 12
local weirdo 0

*reading_results_20170923.do '+str(start_year)+' '+str(topics_nr)+' '+str(mintoken)+' '+str(maxtoken)+' '+where)


*local path "/Users/christopherrauh/Dropbox/text analysis sandbox/data/estimated_topics/"


clear all
set mem 2000m
set maxvar 30000


local toke_int `mintokens'_`maxtokens'

foreach t in 15 {	
	foreach l in `toke_int' {	
		foreach c in `cutter' {
			forval y = 2020 / 2020 {
				local M 12
				if `y' == 2020 {
					local M 8
				}
				forval m = 1 / `M' {
				
					local real_alpha = 50/`t'	 	
					clear all
					*check if file exists
					cap confirm file "`path'`where'/topic_table_svi_`start_year'_`l'_`t'_`c'_`weirdo'_`y'_`m'.csv"
					if _rc == 0 {
						insheet using "`path'`where'/topic_table_svi_`start_year'_`l'_`t'_`c'_`weirdo'_`y'_`m'.csv", comma						
						rename document article_id
						sort article_id
						merge article_id using "`path'/data_info_all_svi"
						keep if _merge == 3
						drop _merge
						sort article_id
						merge article_id using "`path'`where'/token_info_svi_`l'_`start_year'_`weirdo'"
						keep if _merge == 3
						drop _merge
						
						
						replace country="Guinea Bissau" if country=="Guinea-Bissau"
						replace country="Ivory Coast" if country=="Cote d'Ivoire"
						replace country="Libya" if country=="Libyan Arab Jamahiriya"
						replace country="Malasya" if country=="Malaysia"
						replace country="Laos" if country=="Lao People's Democratic Republic"
						replace country="Kazakhstan" if country=="Kazakstan"
						replace country="Russia" if country=="Russian Federation"
						replace country="Serbia" if country=="Serbia and Montenegro"
						replace country="Singapure" if country=="Singapore"
						replace country="Vietnam" if country=="Viet Nam"
						replace country="Syria" if country=="Syrian Arab Republic"
						replace country="South Korea" if country=="Korea"
						replace country="Macau" if country=="Macau (Aomen)"
						
						*New from Baba
						replace country="Antigua and Barbuda" if country=="Antigua"
						replace country="Brunei Darussalam" if country=="Brunei"
						replace country="Burkina Faso" if country=="Burkina faso"
						replace country="Congo (Democratic Republic of the)" if country=="Congo Democratic Republic"
						replace country="Congo (Democratic Republic of the)" if country=="Congo Democratic"
						replace country="Congo" if country=="Congo Republic"
						replace country="Costa Rica" if country=="Costa rica"
						replace country="Ivory Coast" if country=="Cote d'ivoire"
						replace country="Czech Republic" if country=="Czech"
						replace country="Dominican Republic" if country=="Dominica Republic"
						replace country="East Timor" if country=="East timor"
						replace country="El Salvador" if country=="El savador"
						replace country="El Salvador" if country=="El Savador"
						replace country="Guinea Bissau" if country=="Guninea-bissau"
						replace country="Hong Kong" if country=="Hongkong"
						replace country="Malasya" if country=="Malaysia"
						replace country="Marshall Islands" if country=="Marshall Island"
						replace country="Netherlands" if country=="Netherland"
						replace country="New Zealand" if country=="New zealand"
						replace country="Puerto Rico" if country=="Puerto rico"
						replace country="Saint Kitts-Nevis" if country=="Saint Kitts and Nevis"
						replace country="Saint Vincent and the Grenadines" if country=="Saint Vincent and Grenadines"
						replace country="Sao Tome and Principe" if country=="Saint tome"
						replace country="Sao Tome and Principe" if country=="Saint tome abd principe"
						replace country="Saudi Arabia" if country=="Saudi arabia"
						replace country="Serbia" if country=="Serbia and Montengro"
						replace country="Singapure" if country=="Singapore"
						replace country="Solomon Island" if country=="Solomon Islands"
						replace country="South Korea" if country=="South korea"
						replace country="South Sudan" if country=="South sudan"
						replace country="Trinidad and Tobago" if country=="Trinidad and tobago"
						replace country="United Arab Emirates" if country=="United Arab Emirate"
						replace country="United States" if country=="United States of America"
						
						
						replace country = "United States of America" if country == "United States"
						replace country = "Guinea-Bissau" if country == "Guinea Bissau"
						replace country = "Cote d'Ivoire" if country == "Ivory Coast"
						replace country = "Kazakstan" if country == "Kazakhstan"
						replace country = "Lao People's Democratic Republic" if country == "Laos"
						replace country = "Libyan Arab Jamahiriya" if country == "Libya"
						replace country = "Macau (Aomen)" if country == "Macau"
						replace country = "Malaysia" if country == "Malasya"
						replace country = "Russian Federation" if country == "Russia"
						replace country = "Saint Kitts and Nevis" if country == "Saint Kitts-Nevis"
						replace country = "Serbia and Montenegro" if country == "Serbia"
						replace country = "Singapore" if country == "Singapure"
						replace country = "Solomon Islands" if country == "Solomon Island"
						replace country = "Korea" if country == "South Korea"
						replace country = "Syrian Arab Republic" if country == "Syria"
						replace country = "Viet Nam" if country == "Vietnam"
						
						
						*how to adjust thetas properly if you are aggregating
						local t0 = (`t' - 1)
						forval s = 0 / `t0'  {
							quietly gen words`s' = pr_topic_`s' * tokens
						} 
						
						gen obs = 1	
						
						
						
					
						sort  country year month
						drop quarter
						
						keep paper country year month words* tokens obs
						
						
					
						*now collapse
						collapse (sum) words* tokens obs  , by(country year month) fast

					
						
						keep words* tokens obs country  year month
						
				
						
						sort country year month
						save "`path'`where'/topic_info_svi_`start_year'_`l'_`t'_`c'_`weirdo'_`y'_`m'", replace
						save "D:\Dropbox\text analysis sandbox\data\estimated_topics\topic_info_svi_`start_year'_`l'_`t'_`c'_`weirdo'_`y'_`m'", replace
						
						
						
						
						
					}
				}
			}
		}
	}
	*erase `path'temp_token.dta
	erase "`path'/temp.dta"
	*erase `path'`where'/temp_roll.dta
	
}
